import fileModel
from scipy import *
from scipy.sparse import *


def prepereInput(path):
    data = fileModel.read_data(path)
    wordmap, id2word = fileModel.read_wordMap(path)
    docNum = len(data)
    wordNum = len(wordmap)
    print("docNum:" + str(docNum), "wordNum:" + str(wordNum))
    mat = lil_matrix((wordNum, docNum), dtype=int8)
    for docIndex, line in enumerate(data):
        for word in line:
            mat[wordmap[word], docIndex] += 1
    mat = mat * mat.transpose()
    # print(mat)
    print(mat.shape)
    writer = fileModel.open_file(path + ".w2d", 'w')
    for index in range(wordNum):
        for index2 in range(wordNum):
            for t_i in range(mat[index, index2]):
                writer.write(id2word[index2] + " ")
        writer.write("\n")
    writer.close()


root = "data/news_o/"

prepereInput(root + "test.txt")
